#!/usr/bin/env python
# coding: utf-8

# In[60]:


import requests
from lxml import etree
import pandas as pd
from bs4 import BeautifulSoup

header = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"}

# 读取网址文件
with open('url.txt', 'r') as file:
    urls = file.readlines()

# 去除每行末尾的换行符
urls = [url.strip() for url in urls]
# 创建Excel写入对象
writer = pd.ExcelWriter('comment.xlsx')


# In[63]:


# 遍历每个网址
for idx, url in enumerate(urls):
    print(url)

    #用requests库的get方法下载网页
    html = requests.get(url, headers=header).text
    #解析网页并且定位短评
    s=etree.HTML(html)
    file=s.xpath('//*[@id="comments"]/div/ul/li/div[2]/p/span/text()')
    print(file)
    # 使用BeautifulSoup解析网页内容
    soup = BeautifulSoup(requests.get(url, headers=header).content, 'html.parser')
    # 获取网页标题作为sheet名称
    sheet_name = soup.title.string.strip() if soup.title else f'Sheet{idx + 1}'
     # 将网页文本内容写入Excel的一个sheet中
    df = pd.DataFrame(file)
    df.to_excel(writer, sheet_name=sheet_name, index=True)
# 保存Excel文件
writer.save()
print("Data saved to Excel successfully.")

